#####################
## F statistic
#####################


load("metadata/iv_replicate.rds")
names(d)
dim(d)


# count
sum(d$f_report < d$f_boot, na.rm = TRUE)
sum(d$f_report < d$f_boot, na.rm = TRUE) / sum(is.na(d$f_report) == FALSE)

sum(d$f_report < d$f_effective, na.rm = TRUE)
sum(d$f_report < d$f_effective, na.rm = TRUE) / sum(is.na(d$f_report) == FALSE)


# cluster/group structure
d$cluster <- ifelse(is.na(d$f_cluster) == TRUE, 0, 1)
table(d$cluster)
cl <- which(d$cluster == 1)
d$f_cluster[which(d$cluster == 0)] <- d$f_robust[which(d$cluster == 0)]
sum(is.na(d$f_cluster))

# replicated F
d$f_replicated <- d$f_standard
d$f_replicated[which(d$original_estimation_f_type == "robust")] <- d$f_robust[which(d$original_estimation_f_type == "robust")]
d$f_replicated[which(d$original_estimation_f_type == "cluster")] <- d$f_cluster[which(d$original_estimation_f_type == "cluster")]


# fill not reported F
d$f_report_missing <- ifelse(is.na(d$f_report) == TRUE, 1, 0)
d$f_report2 <- d$f_report
d$f_report2[d$f_report_missing == 1] <- d$f_replicated[d$f_report_missing == 1]
# d$f_report[d$f_report_missing == 1] <- 1
d[which(d$f_report_missing == 1), c("name", "f_replicated")]

#######################
# Figure 2
#######################

pdf("graphs/Figure2a.pdf", width = 7, height = 7)
col <- rep("#77777750", nrow(d))
col[cl] <- "#33333390"
col[d$f_report_missing == 1] <- "#FF000060"
par(mar = c(4, 4, 1, 1))
plot(1,
  cex = 0.8, type = "n",
  xlim = c(-0.1, 4), ylim = c(-0.1, 4),
  xlab = "Original F Statistic (Replicated)",
  ylab = "Effective F Statistic", axes = FALSE
)
abline(c(0, 1), col = 2)
abline(v = 1, h = 1, col = "gray", lty = 2, lwd = 2)
points(log10(d$f_replicated[-cl]), log10(d$f_effective[-cl]),
  col = col[-cl], pch = 16,
  cex = 1.2, lwd = 0.8
)
points(log10(d$f_replicated[-cl]), log10(d$f_effective[-cl]), cex = 1.2, lwd = 0.8)
points(log10(d$f_replicated[cl]), log10(d$f_effective[cl]),
  col = col[cl], pch = 17,
  cex = 1.2, lwd = 0.8
)
points(log10(d$f_replicated[cl]), log10(d$f_effective[cl]), cex = 1.2, lwd = 0.8, pch = 2)
box()
axis(1, at = c(-1:4), labels = c(0.1, 1, 10, 100, 1000, 10000))
axis(2, at = c(-1:4), labels = c(0.1, 1, 10, 100, 1000, 10000))
graphics.off()



pdf("graphs/Figure2b.pdf", width = 7, height = 7)
col <- rep("#77777750", nrow(d))
col[cl] <- "#33333390"
col[d$f_report_missing == 1] <- "#FF000060"
par(mar = c(4, 4, 1, 1))
plot(1,
  cex = 0.8, type = "n",
  xlim = c(-0.1, 4), ylim = c(-0.1, 4),
  xlab = "Original F Statistic (Replicated)",
  ylab = "Bootstrapped F Statistic", axes = FALSE
)
abline(c(0, 1), col = 2)
abline(v = 1, h = 1, col = "gray", lty = 2, lwd = 2)
points(log10(d$f_replicated[-cl]), log10(d$f_boot[-cl]),
  col = col[-cl], pch = 16,
  cex = 1.2, lwd = 0.8
)
points(log10(d$f_replicated[-cl]), log10(d$f_boot[-cl]), cex = 1.2, lwd = 0.8)
points(log10(d$f_replicated[cl]), log10(d$f_boot[cl]),
  col = col[cl], pch = 17,
  cex = 1.2, lwd = 0.8
)
points(log10(d$f_replicated[cl]), log10(d$f_boot[cl]), cex = 1.2, lwd = 0.8, pch = 2)
box()
axis(1, at = c(-1:4), labels = c(0.1, 1, 10, 100, 1000, 10000))
axis(2, at = c(-1:4), labels = c(0.1, 1, 10, 100, 1000, 10000))
graphics.off()



##########################
## F statistics (appendix)
##########################

# Figure A1

bin <- 0.5
d$log_f <- floor(log10(d$f_effective) / bin) * bin
# construct counts
t0 <- c(table(d$log_f))
m0 <- c(table(d$log_f[d$experimental == 1]))
bin <- seq(0, 5.2, by = bin)
dat <- matrix(0, length(bin), 2)
rownames(dat) <- 10^bin
for (i in 1:length(bin)) {
  this.bin <- bin[i]
  for (j in 1:length(t0)) {
    if (this.bin == as.numeric(names(t0)[j])) {
      dat[i, 1] <- t0[j]
    }
  }
  for (j in 1:length(m0)) {
    if (this.bin == as.numeric(names(m0)[j])) {
      dat[i, 2] <- m0[j]
    }
  }
}
dat

pdf("graphs/FigureA1.pdf", height = 5, width = 8)
mycol <- c("gray70", "gray30")
par(mar = c(4, 4, 1, 1))
barplot(dat[, 1], space = c(0.7), border = 1, ylim = c(0, 25), col = mycol[1], xaxt = 'n')
barplot(dat[, 2], space = c(0.7), border = 1, add = TRUE, col = mycol[2])
box();mtext("# Studies", 2, 2.5);
mtext("First-Stage Effective F Statistic, Natural Log Scale", 1, 2.5)
legend("topright", c("Observational", "Experimental"),
  cex = 1.0,
  fill = mycol, seg.len = 1, bty = "n"
)
graphics.off()

median(d$f_effective[which(d$experimental == 0)])
median(d$f_effective[which(d$experimental == 1)])


# Figure A3

pdf("graphs/FigureA3.pdf", width = 7, height = 7)
col <- rep("#77777750", nrow(d))
col[cl] <- "#33333390"
col[d$f_report_missing == 1] <- "#FF000060"
par(mar = c(4, 4, 1, 1))
plot(1,
  cex = 0.8, type = "n",
  xlim = c(-0.1, 4), ylim = c(-0.1, 4),
  xlab = "Reported F Statistic",
  ylab = "Replicated F Statistic", axes = FALSE
)
abline(c(0, 1), col = 2)
abline(v = 1, h = 1, col = "gray", lty = 2, lwd = 2)
points(log10(d$f_report[-cl]), log10(d$f_replicated[-cl]),
  col = col[-cl], pch = 16,
  cex = 1.2, lwd = 0.8
)
points(log10(d$f_report[-cl]), log10(d$f_replicated[-cl]), cex = 1.2, lwd = 0.8)
points(log10(d$f_report[cl]), log10(d$f_replicated[cl]),
  col = col[cl], pch = 17,
  cex = 1.2, lwd = 0.8
)
points(log10(d$f_report[cl]), log10(d$f_replicated[cl]), cex = 1.2, lwd = 0.8, pch = 2)
box()
axis(1, at = c(-1:4), labels = c(0.1, 1, 10, 100, 1000, 10000))
axis(2, at = c(-1:4), labels = c(0.1, 1, 10, 100, 1000, 10000))
graphics.off()


# Figure A4

pdf("graphs/FigureA4a.pdf", width = 7, height = 7)
par(mar = c(4, 4, 1, 1))
plot(1,
  cex = 0.8, type = "n",
  xlim = c(-0.1, 4.2), ylim = c(-0.1, 4.2),
  xlab = "Traditional F Statistic",
  ylab = "H.W. Robust F Statistic", axes = FALSE
)
abline(c(0, 1), col = 2)
abline(v = 1, h = 1, col = "gray", lty = 2, lwd = 2)
points(log10(d$f_standard[-cl]), log10(d$f_robust[-cl]),
  col = "#77777750", pch = 16,
  cex = 1.2, lwd = 0.8
)
points(log10(d$f_standard[-cl]), log10(d$f_robust[-cl]), cex = 1.2, lwd = 0.8)
points(log10(d$f_standard[cl]), log10(d$f_robust[cl]),
  col = "#33333390", pch = 17,
  cex = 1.2, lwd = 0.8
)
points(log10(d$f_standard[cl]), log10(d$f_robust[cl]), cex = 1.2, lwd = 0.8, pch = 2)
box()
axis(1, at = c(-1:4), labels = c(0.1, 1, 10, 100, 1000, 10000))
axis(2, at = c(-1:4), labels = c(0.1, 1, 10, 100, 1000, 10000))
graphics.off()

pdf("graphs/FigureA4b.pdf", width = 7, height = 7)
par(mar = c(4, 4, 1, 1))
plot(1,
  cex = 0.8, type = "n",
  xlim = c(-0.1, 4.2), ylim = c(-0.1, 4.2),
  xlab = "H.W. Robust F Statistic",
  ylab = "Effective F Statistic", axes = FALSE
)
abline(c(0, 1), col = 2)
abline(v = 1, h = 1, col = "gray", lty = 2, lwd = 2)
points(log10(d$f_robust[-cl]), log10(d$f_effective[-cl]),
  col = "#77777750", pch = 16,
  cex = 1.2, lwd = 0.8
)
points(log10(d$f_robust[-cl]), log10(d$f_effective[-cl]), cex = 1.2, lwd = 0.8)
points(log10(d$f_robust[cl]), log10(d$f_effective[cl]),
  col = "#33333390", pch = 17,
  cex = 1.2, lwd = 0.8
)
points(log10(d$f_robust[cl]), log10(d$f_effective[cl]), cex = 1.2, lwd = 0.8, pch = 2)
box()
axis(1, at = c(-1:4), labels = c(0.1, 1, 10, 100, 1000, 10000))
axis(2, at = c(-1:4), labels = c(0.1, 1, 10, 100, 1000, 10000))
graphics.off()


pdf("graphs/FigureA4c.pdf", width = 7, height = 7)
par(mar = c(4, 4, 1, 1))
plot(1,
  cex = 0.8, type = "n",
  xlim = c(-0.1, 4.2), ylim = c(-0.1, 4.2),
  xlab = "Effective F Statistic",
  ylab = "Bootstrapped F Statistic", axes = FALSE
)
abline(c(0, 1), col = 2)
abline(v = 1, h = 1, col = "gray", lty = 2, lwd = 2)
points(log10(d$f_effective[-cl]), log10(d$f_boot[-cl]),
  col = "#77777750", pch = 16,
  cex = 1.2, lwd = 0.8
)
points(log10(d$f_effective[-cl]), log10(d$f_boot[-cl]), cex = 1.2, lwd = 0.8)
points(log10(d$f_effective[cl]), log10(d$f_boot[cl]),
  col = "#33333390", pch = 17,
  cex = 1.2, lwd = 0.8
)
points(log10(d$f_effective[cl]), log10(d$f_boot[cl]), cex = 1.2, lwd = 0.8, pch = 2)
box()
axis(1, at = c(-1:4), labels = c(0.1, 1, 10, 100, 1000, 10000))
axis(2, at = c(-1:4), labels = c(0.1, 1, 10, 100, 1000, 10000))
graphics.off()



pdf("graphs/FigureA4d.pdf", width = 7, height = 7)
par(mar = c(4, 4, 1, 1))
plot(1,
  cex = 0.8, type = "n",
  xlim = c(-0.1, 4.2), ylim = c(-0.1, 4.2),
  xlab = "Traditional F Statistic",
  ylab = "Bootstrapped F Statistic", axes = FALSE
)
abline(c(0, 1), col = 2)
abline(v = 1, h = 1, col = "gray", lty = 2, lwd = 2)
points(log10(d$f_standard[-cl]), log10(d$f_boot[-cl]),
  col = "#77777750", pch = 16,
  cex = 1.2, lwd = 0.8
)
points(log10(d$f_standard[-cl]), log10(d$f_boot[-cl]), cex = 1.2, lwd = 0.8)
points(log10(d$f_standard[cl]), log10(d$f_boot[cl]),
  col = "#33333390", pch = 17,
  cex = 1.2, lwd = 0.8
)
points(log10(d$f_standard[cl]), log10(d$f_boot[cl]), cex = 1.2, lwd = 0.8, pch = 2)
box()
axis(1, at = c(-1:4), labels = c(0.1, 1, 10, 100, 1000, 10000))
axis(2, at = c(-1:4), labels = c(0.1, 1, 10, 100, 1000, 10000))
graphics.off()
